bitkeeper revision 1.1691 (42a5bd892a-21ifB8kNwgNvEid-K_Q)
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 7 Jun 2005 15:30:17 +0000 (15:30 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 7 Jun 2005 15:30:17 +0000 (15:30 +0000)
Reove IRQ balancer from Xen. It is unused, and balancing will be done
by the guests themselves.
Signed-off-by: Keir Fraser <keir@xensource.com>
xen/arch/x86/io_apic.c

index a0020c7ec2d74b2e94d8c0a4ff14af82f92ceb65..c582b7679f7b2c67891e8d65150fc5b1201b932e 100644 (file)
@@ -232,440 +232,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h>    /* kernel_thread() */
-# include <xen/kernel_stat.h>  /* kstat */
-# include <xen/xmalloc.h>      /* kmalloc() */
-# include <xen/timer.h>        /* time_after() */
-# ifdef CONFIG_BALANCED_IRQ_DEBUG
-#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
-#  define Dprintk(x...) do { TDprintk(x); } while (0)
-# else
-#  define TDprintk(x...) 
-#  define Dprintk(x...) 
-# endif
-
-cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
-
-#define IRQBALANCE_CHECK_ARCH -999
-static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
-static int physical_balance = 0;
-
-struct irq_cpu_info {
-       unsigned long * last_irq;
-       unsigned long * irq_delta;
-       unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu)           (irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu,irq)     (irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
-               (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
-
-#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA                (HZ)
-
-long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
-                       unsigned long now, int direction)
-{
-       int search_idle = 1;
-       int cpu = curr_cpu;
-
-       goto inside;
-
-       do {
-               if (unlikely(cpu == curr_cpu))
-                       search_idle = 0;
-inside:
-               if (direction == 1) {
-                       cpu++;
-                       if (cpu >= NR_CPUS)
-                               cpu = 0;
-               } else {
-                       cpu--;
-                       if (cpu == -1)
-                               cpu = NR_CPUS-1;
-               }
-       } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
-                       (search_idle && !IDLE_ENOUGH(cpu,now)));
-
-       return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
-       unsigned long now = jiffies;
-       cpumask_t allowed_mask;
-       unsigned int new_cpu;
-               
-       if (irqbalance_disabled)
-               return; 
-
-       cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
-       new_cpu = move(cpu, allowed_mask, now, 1);
-       if (cpu != new_cpu) {
-               irq_desc_t *desc = irq_desc + irq;
-               unsigned long flags;
-
-               spin_lock_irqsave(&desc->lock, flags);
-               pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
-               spin_unlock_irqrestore(&desc->lock, flags);
-       }
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
-       int i, j;
-       Dprintk("Rotating IRQs among CPUs.\n");
-       for (i = 0; i < NR_CPUS; i++) {
-               for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
-                       if (!irq_desc[j].action)
-                               continue;
-                       /* Is it a significant load ?  */
-                       if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
-                                               useful_load_threshold)
-                               continue;
-                       balance_irq(i, j);
-               }
-       }
-       balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-               balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
-       return;
-}
-
-static void do_irq_balance(void)
-{
-       int i, j;
-       unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
-       unsigned long move_this_load = 0;
-       int max_loaded = 0, min_loaded = 0;
-       int load;
-       unsigned long useful_load_threshold = balanced_irq_interval + 10;
-       int selected_irq;
-       int tmp_loaded, first_attempt = 1;
-       unsigned long tmp_cpu_irq;
-       unsigned long imbalance = 0;
-       cpumask_t allowed_mask, target_cpu_mask, tmp;
-
-       for (i = 0; i < NR_CPUS; i++) {
-               int package_index;
-               CPU_IRQ(i) = 0;
-               if (!cpu_online(i))
-                       continue;
-               package_index = CPU_TO_PACKAGEINDEX(i);
-               for (j = 0; j < NR_IRQS; j++) {
-                       unsigned long value_now, delta;
-                       /* Is this an active IRQ? */
-                       if (!irq_desc[j].action)
-                               continue;
-                       if ( package_index == i )
-                               IRQ_DELTA(package_index,j) = 0;
-                       /* Determine the total count per processor per IRQ */
-                       value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
-                       /* Determine the activity per processor per IRQ */
-                       delta = value_now - LAST_CPU_IRQ(i,j);
-
-                       /* Update last_cpu_irq[][] for the next time */
-                       LAST_CPU_IRQ(i,j) = value_now;
-
-                       /* Ignore IRQs whose rate is less than the clock */
-                       if (delta < useful_load_threshold)
-                               continue;
-                       /* update the load for the processor or package total */
-                       IRQ_DELTA(package_index,j) += delta;
-
-                       /* Keep track of the higher numbered sibling as well */
-                       if (i != package_index)
-                               CPU_IRQ(i) += delta;
-                       /*
-                        * We have sibling A and sibling B in the package
-                        *
-                        * cpu_irq[A] = load for cpu A + load for cpu B
-                        * cpu_irq[B] = load for cpu B
-                        */
-                       CPU_IRQ(package_index) += delta;
-               }
-       }
-       /* Find the least loaded processor package */
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
-               if (i != CPU_TO_PACKAGEINDEX(i))
-                       continue;
-               if (min_cpu_irq > CPU_IRQ(i)) {
-                       min_cpu_irq = CPU_IRQ(i);
-                       min_loaded = i;
-               }
-       }
-       max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
-       /* Look for heaviest loaded processor.
-        * We may come back to get the next heaviest loaded processor.
-        * Skip processors with trivial loads.
-        */
-       tmp_cpu_irq = 0;
-       tmp_loaded = -1;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
-               if (i != CPU_TO_PACKAGEINDEX(i))
-                       continue;
-               if (max_cpu_irq <= CPU_IRQ(i)) 
-                       continue;
-               if (tmp_cpu_irq < CPU_IRQ(i)) {
-                       tmp_cpu_irq = CPU_IRQ(i);
-                       tmp_loaded = i;
-               }
-       }
-
-       if (tmp_loaded == -1) {
-        /* In the case of small number of heavy interrupt sources, 
-         * loading some of the cpus too much. We use Ingo's original 
-         * approach to rotate them around.
-         */
-               if (!first_attempt && imbalance >= useful_load_threshold) {
-                       rotate_irqs_among_cpus(useful_load_threshold);
-                       return;
-               }
-               goto not_worth_the_effort;
-       }
-       
-       first_attempt = 0;              /* heaviest search */
-       max_cpu_irq = tmp_cpu_irq;      /* load */
-       max_loaded = tmp_loaded;        /* processor */
-       imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-       
-       Dprintk("max_loaded cpu = %d\n", max_loaded);
-       Dprintk("min_loaded cpu = %d\n", min_loaded);
-       Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
-       Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
-       Dprintk("load imbalance = %lu\n", imbalance);
-
-       /* if imbalance is less than approx 10% of max load, then
-        * observe diminishing returns action. - quit
-        */
-       if (imbalance < (max_cpu_irq >> 3)) {
-               Dprintk("Imbalance too trivial\n");
-               goto not_worth_the_effort;
-       }
-
-tryanotherirq:
-       /* if we select an IRQ to move that can't go where we want, then
-        * see if there is another one to try.
-        */
-       move_this_load = 0;
-       selected_irq = -1;
-       for (j = 0; j < NR_IRQS; j++) {
-               /* Is this an active IRQ? */
-               if (!irq_desc[j].action)
-                       continue;
-               if (imbalance <= IRQ_DELTA(max_loaded,j))
-                       continue;
-               /* Try to find the IRQ that is closest to the imbalance
-                * without going over.
-                */
-               if (move_this_load < IRQ_DELTA(max_loaded,j)) {
-                       move_this_load = IRQ_DELTA(max_loaded,j);
-                       selected_irq = j;
-               }
-       }
-       if (selected_irq == -1) {
-               goto tryanothercpu;
-       }
-
-       imbalance = move_this_load;
-       
-       /* For physical_balance case, we accumlated both load
-        * values in the one of the siblings cpu_irq[],
-        * to use the same code for physical and logical processors
-        * as much as possible. 
-        *
-        * NOTE: the cpu_irq[] array holds the sum of the load for
-        * sibling A and sibling B in the slot for the lowest numbered
-        * sibling (A), _AND_ the load for sibling B in the slot for
-        * the higher numbered sibling.
-        *
-        * We seek the least loaded sibling by making the comparison
-        * (A+B)/2 vs B
-        */
-       load = CPU_IRQ(min_loaded) >> 1;
-       for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
-               if (load > CPU_IRQ(j)) {
-                       /* This won't change cpu_sibling_map[min_loaded] */
-                       load = CPU_IRQ(j);
-                       min_loaded = j;
-               }
-       }
-
-       cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
-       target_cpu_mask = cpumask_of_cpu(min_loaded);
-       cpus_and(tmp, target_cpu_mask, allowed_mask);
-
-       if (!cpus_empty(tmp)) {
-               irq_desc_t *desc = irq_desc + selected_irq;
-               unsigned long flags;
-
-               Dprintk("irq = %d moved to cpu = %d\n",
-                               selected_irq, min_loaded);
-               /* mark for change destination */
-               spin_lock_irqsave(&desc->lock, flags);
-               pending_irq_balance_cpumask[selected_irq] =
-                                       cpumask_of_cpu(min_loaded);
-               spin_unlock_irqrestore(&desc->lock, flags);
-               /* Since we made a change, come back sooner to 
-                * check for more variation.
-                */
-               balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
-                       balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
-               return;
-       }
-       goto tryanotherirq;
-
-not_worth_the_effort:
-       /*
-        * if we did not find an IRQ to move, then adjust the time interval
-        * upward
-        */
-       balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
-               balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);       
-       Dprintk("IRQ worth rotating not found\n");
-       return;
-}
-
-static int balanced_irq(void *unused)
-{
-       int i;
-       unsigned long prev_balance_time = jiffies;
-       long time_remaining = balanced_irq_interval;
-
-       daemonize("kirqd");
-       
-       /* push everything to CPU 0 to give us a starting point.  */
-       for (i = 0 ; i < NR_IRQS ; i++) {
-               pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
-       }
-
-       for ( ; ; ) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               time_remaining = schedule_timeout(time_remaining);
-               try_to_freeze(PF_FREEZE);
-               if (time_after(jiffies,
-                               prev_balance_time+balanced_irq_interval)) {
-                       do_irq_balance();
-                       prev_balance_time = jiffies;
-                       time_remaining = balanced_irq_interval;
-               }
-       }
-       return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
-       int i;
-       struct cpuinfo_x86 *c;
-       cpumask_t tmp;
-
-       cpus_shift_right(tmp, cpu_online_map, 2);
-        c = &boot_cpu_data;
-       /* When not overwritten by the command line ask subarchitecture. */
-       if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
-               irqbalance_disabled = NO_BALANCE_IRQ;
-       if (irqbalance_disabled)
-               return 0;
-       
-        /* disable irqbalance completely if there is only one processor online */
-       if (num_online_cpus() < 2) {
-               irqbalance_disabled = 1;
-               return 0;
-       }
-       /*
-        * Enable physical balance only if more than 1 physical processor
-        * is present
-        */
-       if (smp_num_siblings > 1 && !cpus_empty(tmp))
-               physical_balance = 1;
-
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
-               irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-               irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
-               if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
-                       printk(KERN_ERR "balanced_irq_init: out of memory");
-                       goto failed;
-               }
-               memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
-               memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
-       }
-       
-       printk(KERN_INFO "Starting balanced_irq\n");
-       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
-               return 0;
-       else 
-               printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
-       for (i = 0; i < NR_CPUS; i++) {
-               if(irq_cpu_data[i].irq_delta)
-                       kfree(irq_cpu_data[i].irq_delta);
-               if(irq_cpu_data[i].last_irq)
-                       kfree(irq_cpu_data[i].last_irq);
-       }
-       return 0;
-}
-
-int __init irqbalance_disable(char *str)
-{
-       irqbalance_disabled = 1;
-       return 0;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-static inline void move_irq(int irq)
-{
-       /* note - we hold the desc->lock */
-       if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
-               set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
-               cpus_clear(pending_irq_balance_cpumask[irq]);
-       }
-}
-
-late_initcall(balanced_irq_init);
-
-#else /* !CONFIG_IRQBALANCE */
-static inline void move_irq(int irq) { }
-#endif /* CONFIG_IRQBALANCE */
-
-#ifndef CONFIG_SMP
-void fastcall send_IPI_self(int vector)
-{
-       unsigned int cfg;
-
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
-       /*
-        * Send the IPI. The write to APIC_ICR fires this off.
-        */
-       apic_write_around(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP */
-
 /*
  * Find the IRQ entry number of a certain pin.
  */
@@ -1610,7 +1176,6 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
-       move_irq(irq);
        if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
                                        == (IRQ_PENDING | IRQ_DISABLED))
                mask_IO_APIC_irq(irq);
@@ -1643,8 +1208,6 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq)
        unsigned long v;
        int i;
 
-       move_irq(irq);
-
        mask_IO_APIC_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11